load(here::here("data", "working", "model_objects", "basic_model_bayes_no_interact.RData"))
load(here::here("data", "working", "model_objects", "basic_model_bayes_yes_interact.RData"))
load(here::here("data", "working", "model_objects", "basic_model_freq_no_interact.RData"))
load(here::here("data", "working", "model_objects", "basic_model_freq_yes_interact.RData"))
load(here::here("data", "working", "model_objects", "neighbor_model_bayes_no_interact.RData"))
load(here::here("data", "working", "model_objects", "neighbor_model_bayes_yes_interact.RData"))

library(rstanarm)
## Loading required package: Rcpp
## rstanarm (Version 2.19.3, packaged: 2020-02-11 05:16:41 UTC)
## - Do not expect the default priors to remain the same in future rstanarm versions.
## Thus, R scripts should specify priors explicitly, even if they are just the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
library(bayesplot)
## This is bayesplot version 1.7.2
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(sf)
## Linking to GEOS 3.7.2, GDAL 2.4.0, PROJ 6.1.1
library(broom)
library(purrr)
library(glue)
## 
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
## 
##     collapse
library(stringr)
library(viridis)
## Loading required package: viridisLite
library(spdep)
## Loading required package: sp
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(ape)
## Registered S3 method overwritten by 'ape':
##   method   from 
##   plot.mst spdep
prepared_data <- readr::read_csv(here::here("data", "final", "response_time_model_data_prepared.csv"))
## Parsed with column specification:
## cols(
##   response_time_hundreths_of_minutes = col_double(),
##   patient_age = col_double(),
##   patient_gender = col_character(),
##   response_vehicle_type_collapsed = col_character(),
##   after_covid = col_logical(),
##   impression_category = col_character(),
##   possible_impression_category_collapsed = col_character(),
##   patient_first_race_collapsed = col_character(),
##   time_of_day = col_double(),
##   scene_gps_latitude = col_double(),
##   scene_gps_longitude = col_double(),
##   incident_dispatch_notified_to_unit_arrived_on_scene_in_minutes = col_double(),
##   log_trans_dispatch_time = col_double()
## )
prepared_data_sp <- sf::st_read(here::here("data", "final", "response_time_model_data_prepared_sp.geojson"))
## Reading layer `response_time_model_data_prepared_sp' from data source `/project/class/bii_sdad_dspg/uva/charlottesville-ems_equity/final/response_time_model_data_prepared_sp.geojson' using driver `GeoJSON'
## Simple feature collection with 51544 features and 15 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -78.83887 ymin: 37.72275 xmax: -78.20938 ymax: 38.27793
## CRS:            4326
freq_models <- list("basic_model_freq_no_interact" = basic_model_freq_no_interact,
                    "basic_model_freq_yes_interact" = basic_model_freq_yes_interact)

bayes_models <- stanreg_list("basic_model_bayes_no_interact" = basic_model_bayes_no_interact,
                             "basic_model_bayes_yes_interact" = basic_model_bayes_yes_interact,
                             "neighbor_model_bayes_no_interact" = neighbor_model_bayes_no_interact,
                             "neighbor_model_bayes_yes_interact" = neighbor_model_bayes_yes_interact)

Examine Coefficients

Setup everything

freq_model_coefs <- map(freq_models, tidy,
                        conf.int = TRUE,
                        conf.level = 0.95,
                        exponentiate = FALSE)

bayes_model_coefs <- map(bayes_models, ~tidy(.x$stanfit,
                                             estimate.method = "median",
                                             conf.int = TRUE,
                                             conf.level = 0.95)) %>%
  map(~filter(.x, !(term %in% c("sigma", "mean_PPD", "log-posterior"))))


freq_model_coefs_trans <- freq_model_coefs %>%
  map(~mutate(.x, across(c(estimate,
                           conf.low,
                           conf.high),
                         list(scale_factor = ~exp(.x * (.x != .x[1])), # scales to exponential, expression insures intercept is right
                              time_to_incident = ~exp(.x * (.x != .x[1]) + .x[1]))))) # scales to exponential, expression insures intercept is right

bayes_model_coefs_trans <- bayes_model_coefs %>%
  map(~mutate(.x, across(c(estimate,
                           conf.low,
                           conf.high),
                         list(scale_factor = ~exp(.x * (.x != .x[1])),
                              time_to_incident = ~exp(.x * (.x != .x[1]) + .x[1])))))


plot_scale_factor <- function(data) {
  ggplot(data, aes(y = term)) +
    geom_point(aes(x = estimate_scale_factor)) +
    geom_errorbar(aes(xmin = conf.low_scale_factor, xmax = conf.high_scale_factor)) +
    theme_minimal() +
    labs(x = NULL, y = glue("Scale Factor from {round(data$estimate_time_to_incident[1],
                                                      digits = 1)} Minutes")) +
    coord_cartesian(xlim = c(0.25, 1.75)) +
    geom_vline(xintercept = 1, alpha = 0.5)
}

plot_time_to_incident <- function(data) {
  ggplot(data) +
    geom_point(aes(x = term, y = estimate_time_to_incident)) +
    geom_errorbar(aes(x = term, ymin = conf.low_time_to_incident, ymax = conf.high_time_to_incident)) +
    theme_minimal() +
    coord_flip() +
    labs(x = NULL, y = glue("Travel Time in Minutes"))
}

Actually make the plots:

map(bayes_model_coefs_trans,
    plot_scale_factor)
## $basic_model_bayes_no_interact

## 
## $basic_model_bayes_yes_interact

## 
## $neighbor_model_bayes_no_interact

## 
## $neighbor_model_bayes_yes_interact

map(bayes_model_coefs_trans,
    plot_time_to_incident)
## $basic_model_bayes_no_interact

## 
## $basic_model_bayes_yes_interact

## 
## $neighbor_model_bayes_no_interact

## 
## $neighbor_model_bayes_yes_interact

Looks like the after covid interaction term doesn't do a lot in the non neighborhood models. In the neighborhood models however, it looks like times consistently increase.

Now for frequentist models:

map(freq_model_coefs_trans,
    plot_scale_factor)
## $basic_model_freq_no_interact

## 
## $basic_model_freq_yes_interact

map(freq_model_coefs_trans,
    plot_time_to_incident)
## $basic_model_freq_no_interact

## 
## $basic_model_freq_yes_interact

Basically identical to the bayesian models

After Covid Interaction Term

cbbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")



bayes_model_coefs_trans[[2]] %>%
  mutate(after_covid = str_detect(term, "(after_covidTRUE.*)")) %>%
  mutate(term = str_replace(term, "after_covidTRUE:", "")) %>%
  ggplot(aes(y = term, color = after_covid)) +
  geom_point(aes(x = estimate_scale_factor)) +
  geom_errorbar(aes(xmin = conf.low_scale_factor, xmax = conf.high_scale_factor)) +
  theme_minimal() +
  scale_color_manual(values = cbbPalette) +
  coord_cartesian(xlim = c(0.5, 1.5)) +
  geom_vline(xintercept = 1, alpha = 0.5) +
  labs(y = NULL, x = glue("Scale Factor from {round(bayes_model_coefs_trans[[2]]$estimate_time_to_incident[1],
                                                      digits = 1)} Minutes"),
       color = "During Covid")

bayes_model_coefs_trans[[4]] %>%
  mutate(after_covid = str_detect(term, "(after_covidTRUE.*)")) %>%
  mutate(term = str_replace(term, "after_covidTRUE:", "")) %>%
  ggplot(aes(y = term, color = after_covid)) +
  geom_point(aes(x = estimate_scale_factor)) +
  geom_errorbar(aes(xmin = conf.low_scale_factor, xmax = conf.high_scale_factor)) +
  theme_minimal() +
  scale_color_manual(values = cbbPalette) +
  coord_cartesian(xlim = c(0.5, 1.5)) +
  geom_vline(xintercept = 1, alpha = 0.5) +
  labs(y = NULL, x = glue("Scale Factor from {round(bayes_model_coefs_trans[[4]]$estimate_time_to_incident[1],
                                                      digits = 1)} Minutes"),
       color = "During Covid")

Neighborhood Coefficients

neighborhood_coefs <- map(bayes_model_coefs_trans, ~.x %>% 
    filter(str_detect(term, r"(b\[)")) %>% 
    mutate(NAME = str_extract(term, r"((?<=NAME:).*(?=\]))")) %>% 
    mutate(NAME = str_replace_all(NAME, "_", " ")))[3:4]

neighborhood_coefs_sp <- map(neighborhood_coefs, ~left_join(prepared_data_sp %>% 
  group_by(NAME) %>% 
  slice_head(1) %>% 
  ungroup(), .x, by = "NAME"))

neighborhood_coefs_sp$neighbor_model_bayes_no_interact %>% 
  ggplot() +
  geom_sf(aes(fill = estimate_scale_factor))

Technical Examination

I will only be examining bayesian fits because they are almost identical to the frequentist fits.

First we'll look at the loo statistics

(loo_list <- map(bayes_models, loo))
## Warning: Found 1 observation(s) with a pareto_k > 0.7. We recommend calling 'loo' again with argument 'k_threshold = 0.7' in order to calculate the ELPD without the assumption that these observations are negligible. This will refit the model 1 times to compute the ELPDs for the problematic observations directly.

## Warning: Found 1 observation(s) with a pareto_k > 0.7. We recommend calling 'loo' again with argument 'k_threshold = 0.7' in order to calculate the ELPD without the assumption that these observations are negligible. This will refit the model 1 times to compute the ELPDs for the problematic observations directly.
## $basic_model_bayes_no_interact
## 
## Computed from 10000 by 51544 log-likelihood matrix
## 
##          Estimate    SE
## elpd_loo -31745.5 293.0
## p_loo        25.2   0.7
## looic     63491.0 586.0
## ------
## Monte Carlo SE of elpd_loo is 0.0.
## 
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
## 
## $basic_model_bayes_yes_interact
## 
## Computed from 25000 by 51544 log-likelihood matrix
## 
##          Estimate    SE
## elpd_loo -31755.8 292.8
## p_loo        45.5   1.5
## looic     63511.7 585.6
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     51543 100.0%  8953      
##  (0.5, 0.7]   (ok)           0   0.0%  <NA>      
##    (0.7, 1]   (bad)          1   0.0%  55        
##    (1, Inf)   (very bad)     0   0.0%  <NA>      
## See help('pareto-k-diagnostic') for details.
## 
## $neighbor_model_bayes_no_interact
## 
## Computed from 25000 by 51544 log-likelihood matrix
## 
##          Estimate    SE
## elpd_loo -24799.8 372.3
## p_loo        65.2   1.9
## looic     49599.5 744.5
## ------
## Monte Carlo SE of elpd_loo is 0.0.
## 
## All Pareto k estimates are good (k < 0.5).
## See help('pareto-k-diagnostic') for details.
## 
## $neighbor_model_bayes_yes_interact
## 
## Computed from 25000 by 51544 log-likelihood matrix
## 
##          Estimate    SE
## elpd_loo -24807.8 372.0
## p_loo        85.1   2.3
## looic     49615.5 744.1
## ------
## Monte Carlo SE of elpd_loo is NA.
## 
## Pareto k diagnostic values:
##                          Count Pct.    Min. n_eff
## (-Inf, 0.5]   (good)     51543 100.0%  5851      
##  (0.5, 0.7]   (ok)           0   0.0%  <NA>      
##    (0.7, 1]   (bad)          1   0.0%  100       
##    (1, Inf)   (very bad)     0   0.0%  <NA>      
## See help('pareto-k-diagnostic') for details.
loo_compare(loo_list)
##                                   elpd_diff se_diff
## neighbor_model_bayes_no_interact      0.0       0.0
## neighbor_model_bayes_yes_interact    -8.0       5.3
## basic_model_bayes_no_interact     -6945.8     149.6
## basic_model_bayes_yes_interact    -6956.1     149.8

Looks like the neighborhood level models do a lot! better, and they all have reasonable specifications

map(bayes_models, pp_check)
## $basic_model_bayes_no_interact

## 
## $basic_model_bayes_yes_interact

## 
## $neighbor_model_bayes_no_interact

## 
## $neighbor_model_bayes_yes_interact

The capture the majority of model variation, the remaining portion is likely due to it not being truly normal. Maybe a proper spatial mixed effects model could solve that?

Lets now check the residuals on a map:

bayes_residuals <- map(bayes_models, residuals)

augmented_data <- prepared_data_sp %>% 
  mutate(resid_basic_no = bayes_residuals$basic_model_bayes_no_interact,
         resid_basic_yes = bayes_residuals$basic_model_bayes_yes_interact,
         resid_neighbor_no = bayes_residuals$neighbor_model_bayes_no_interact,
         resid_neighbor_yes = bayes_residuals$neighbor_model_bayes_yes_interact)

Basic Linear Model with No Interaction Term

augmented_data %>% 
  ggplot() +
  stat_summary_hex(aes(x = scene_gps_longitude, 
                       y = scene_gps_latitude,
                       z = resid_basic_no)) +
  geom_sf(fill = NA,
          color = "#444444",
          alpha = 0.3,
          size = 0.1) +
  scale_fill_viridis() +
  theme_minimal() +
  labs(x = "Longitude", 
       y = "Latitude",
       fill = "Mean Residuals",
       title = "Basic Linear Model, No Interactions") 

Basic Linear Model with Interaction Term

augmented_data %>% 
  ggplot() +
  stat_summary_hex(aes(x = scene_gps_longitude, 
                       y = scene_gps_latitude,
                       z = resid_basic_yes)) +
  geom_sf(fill = NA,
          color = "#444444",
          alpha = 0.3,
          size = 0.1) +
  scale_fill_viridis() +
  theme_minimal() +
  labs(x = "Longitude", 
       y = "Latitude",
       fill = "Mean Residuals",
       title = "Basic Linear Model, Interactions") 

Neighborhood Model with No Interaction Term

augmented_data %>% 
  ggplot() +
  stat_summary_hex(aes(x = scene_gps_longitude, 
                       y = scene_gps_latitude,
                       z = resid_neighbor_no)) +
  geom_sf(fill = NA,
          color = "#444444",
          alpha = 0.3,
          size = 0.1) +
  scale_fill_viridis() +
  theme_minimal() +
  labs(x = "Longitude", 
       y = "Latitude",
       fill = "Mean Residuals",
       title = "Neighborhood Model, No Interactions") 

Neighborhood Model with Interaction Term

augmented_data %>% 
  ggplot() +
  stat_summary_hex(aes(x = scene_gps_longitude, 
                       y = scene_gps_latitude,
                       z = resid_neighbor_yes)) +
  geom_sf(fill = NA,
          color = "#444444",
          alpha = 0.3,
          size = 0.1) +
  scale_fill_viridis() +
  theme_minimal() +
  labs(x = "Longitude", 
       y = "Latitude",
       fill = "Mean Residuals",
       title = "Neighborhood Model, Interactions") 

Measures of Spatial Autocorrelation

set.seed(451)

augmented_data_sampled <- augmented_data %>% 
  sample_n(2000)

distances <- as.matrix(dist(cbind(augmented_data_sampled$scene_gps_longitude, augmented_data_sampled$scene_gps_latitude)))

distances_inv <- 1 / distances

diag(distances) <- 0
distances_inv[is.infinite(distances_inv)] <- 0

First I'll use ape for global Moran's I statistics

ape::Moran.I(augmented_data_sampled$resid_basic_no, distances_inv)
## $observed
## [1] 0.06648128
## 
## $expected
## [1] -0.0005002501
## 
## $sd
## [1] 0.004039064
## 
## $p.value
## [1] 0
ape::Moran.I(augmented_data_sampled$resid_basic_yes, distances_inv)
## $observed
## [1] 0.06661546
## 
## $expected
## [1] -0.0005002501
## 
## $sd
## [1] 0.004039059
## 
## $p.value
## [1] 0
ape::Moran.I(augmented_data_sampled$resid_neighbor_no, distances_inv)
## $observed
## [1] 0.02011118
## 
## $expected
## [1] -0.0005002501
## 
## $sd
## [1] 0.004036932
## 
## $p.value
## [1] 3.295459e-07
ape::Moran.I(augmented_data_sampled$resid_neighbor_yes, distances_inv)
## $observed
## [1] 0.02022165
## 
## $expected
## [1] -0.0005002501
## 
## $sd
## [1] 0.00403693
## 
## $p.value
## [1] 2.850324e-07

Next we'll check for local spatial autocorrelation using spdep

morans_list <- augmented_data_sampled %>% 
  st_drop_geometry() %>% 
  select(resid_basic_no:resid_neighbor_yes) %>% 
  map(~localmoran(.x, mat2listw(distances_inv)))

Lets plot the z-scores

map2(morans_list, names(morans_list), function(moran_info, variable_name) {
  z_score <- moran_info[,4]
  
  augmented_data_sampled %>% 
    ggplot() +
    stat_summary_hex(aes(x = scene_gps_longitude, 
                         y = scene_gps_latitude,
                         z = z_score)) +
    geom_sf(fill = NA,
            color = "#444444",
            alpha = 0.3,
            size = 0.1) +
    theme_minimal() +
    labs(x = "Longitude", 
         y = "Latitude",
         fill = "Mean Z-Score",
         title = variable_name) +
    scale_fill_gradient2()
})
## $resid_basic_no

## 
## $resid_basic_yes

## 
## $resid_neighbor_no

## 
## $resid_neighbor_yes

Looks like when neighborhood terms are included, spatial autocorrelation completly dissapears. This is robust across several samples, but only one is shown here due to computational issues.